# load AIS data by month and create trajectories

# to make sure the 64bit version is used
# C:\Python27\ArcGISx6410.5\python.exe createTracks_month.py

import arcpy
import os
import time
import calendar
import multiprocessing
from functools import partial
import TrackBuilderFunction_M as TB # track builder that is m-enabled (for storing speed)
import zipfile



prjFile = os.path.join(arcpy.GetInstallInfo()["InstallDir"],"Coordinate Systems/Geographic Coordinate Systems/World/WGS 1984.prj")
spatialRef = arcpy.SpatialReference(prjFile)

# allowing for files to be overwritten
arcpy.env.overwriteOutput=True

CORES = 4 # 8 

# locations and labels
data_dir = r"H:\My Drive\Boats\ReplicationCode\data\AIS"
arcpy.env.workspace = data_dir

# Location to Store Results
out_folder = r"H:\My Drive\Boats\ReplicationCode\data\AIS\lines_v2\%s"
out_gdb = "lines%02d.gdb" # output gdb

out_str = "westcoast5_lines_%s_%s" # year, month

tag = "Zone%s_%s_%02d" # (zone,year,month) name of folder to unzip

# folder location of input gdb
source_gdb_folder = r"%s\%02d_%s_%s"
source_gdb_tag = r"Zone%s_%s_%02d.gdb" 
source_gdb_str = os.path.join(source_gdb_folder,source_gdb_tag)

# folder location for 2015-2016
source_tag_1516=r"%s\AIS_%s_%02d_Zone%02d.zip" # year year month zone 
dest_tag_1516=source_gdb_folder # year month month year 
source_str_1516 = os.path.join(data_dir,source_gdb_folder,r"AIS_%s_%02d_Zone%02d.csv")

def main() :
    
    start = time.time()

    arcpy.env.workspace = data_dir

    # MONTHS
    months = range(1,13)

    # ZONES
    zones = [3,4,5,6,7,8,9,10,11] # includes AK and HI
	
    # YEARS 
    years = [2009,2010,2011,2012,2013,2014]

    #TESTING
    #months = [2]
    #zones = [3,4,5,6,7,8,9,10,11] # includes AK and HI
    #years = [2009]
        

    # create output folders and gdbs
    print "Creating output directories and gdbs"
    for year in years :
        folder = os.path.join( out_folder  % (year)  )
        # create folders if needed
        if not os.path.exists( folder ) :
            os.makedirs( folder )
            print "made directory: %s" % folder
 
        for month in months :
            gdb_path = os.path.join(out_folder % (year) , out_gdb % (month))
            if not arcpy.Exists(gdb_path) == True:
                print  "Creating %s" % gdb_path
                arcpy.CreateFileGDB_management( out_folder % (year) , out_gdb % (month) )
                                
                #arcpy.Delete_management (gdb_path)
                #print arcpy.GetMessages()
                #print arcpy.GetMessages()

        # unzip all AIS files
        unzip(zones,years,months)
    
    # make list of year and months (2008.12) is 12/2008
    years_months = []
    for y in years :
        for m in months:
            years_months.append(float(y) + float(m)/100)

    print "Running the following years/months"
    print years_months
    
    # process lines (one process for each month in a year)
    worker = partial (make_lines_month_v2, zones=zones)
    pool = multiprocessing.Pool(CORES)       
    pool.map(worker,years_months)
    pool.close()
    pool.join() 
            
    print("--- %s seconds ---" % (time.time() - start))


      
def run_track_builder(points,vesselTable,outFolder,outFile) :

    # creating tracks using Track Builder
    sInputFile = points # can use feature class here. Saves having to load the layer 
    sOutWorkspace = outFolder   # os.path.join(out_folder % (year) , out_gdb % (month)) #os.path.join(data_dir,out_gdb) #r"E:\data\temp.gdb"
    sOutName = outFile          # out_str % ( year, month ) 
    sLineMethod = "Maximum Time and Distance" # "Time Interval" 
    #iMaxTimeMinutes = 120 # first version this set to 60
    #iMaxDistMiles = 20   # first version this set to 20
    iIntervalHours = 1 # unused

    # don't add vessel data if vesselTable is empty
    if vesselTable is None:
        sAddVesselData = "false"
    else:
        sAddVesselData = "true"
    

    sAddVoyageData = "false"
    voyageMethod = "" #"Most Frequent" # "First"
    voyageTable = ""    #voyage_source

    print "Making Tracks"
    TB.RunTrackBuilder(sInputFile=sInputFile, sOutWorkspace=sOutWorkspace, sOutName=sOutName,sLineMethod=sLineMethod,
                iIntervalHours=iIntervalHours,
                sAddVesselData=sAddVesselData, vesselTable=vesselTable,
               voyageMethod = voyageMethod , sAddVoyageData = sAddVoyageData , voyageTable = voyageTable )



def make_lines_month_v2(year_month,zones) :

    # extracting year and month
    year = round(year_month) # gets year
    month = round( (year_month - year ) * 100 )  # gets month

    year = int(year)
    month = int(month)

    # range of points to load
    selector = 'SOG>2.5 AND MMSI>=201000000 AND MMSI<=775999999 AND EXTRACT(YEAR FROM BaseDateTime)=%s AND EXTRACT(MONTH FROM BaseDateTime)=%s' % (year,month) # getting all tracks in month
    #selector = 'SOG>2.5 AND MMSI=211766000 AND EXTRACT(YEAR FROM BaseDateTime)=%s AND EXTRACT(MONTH FROM BaseDateTime)=%s' % (year,month) # getting all tracks in month
    # according to moore et al MMSI only valid for 201000000-775999999

    # starting timer
    t_s = time.time()

    print "selecting points"
    merge_list = [] ;
    merge_list_vessel = [] ;
    for zone in zones :

        # location of points and other tables
        points_feat = "points_%s" % zone # where to store points
        merge_list.append(points_feat)
        if year<2015 :
            points_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Broadcast")
            arcpy.management.MakeFeatureLayer(points_source,points_feat,selector) #  getting points for each zone and copying to memory
           
            #voyage_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Voyage") 
            vessel_source_z = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Vessel")
            merge_list_vessel.append(vessel_source_z)
        else :
            points_source = os.path.join(source_str_1516 % (year,month,calendar.month_name[month],year,year,month,zone))
            arcpy.management.MakeTableView(points_source,points_feat,'SOG>2.5 AND MMSI>=201000000 AND MMSI<=775999999') #  getting points for each zone and copying to memory       

    print("--- %s seconds ---" % (time.time() - t_s))

    print "merging"
    # merge Points
    merged_lyr = os.path.join( "in_memory", "merged_%s_%s" % (year,month) )
    if year<2015 :
        # only merge vessel tables if pre 2015
        merged_vessel_table = os.path.join( "in_memory", "merged_vessel_%s_%s" % (year,month) )
        # merge tables and remove duplicates
        arcpy.Merge_management(merge_list_vessel,merged_vessel_table)
        arcpy.DeleteIdentical_management(merged_vessel_table, ["MMSI"])
        arcpy.Merge_management(merge_list,merged_lyr)
    else:
        merged_vessel_table=None
        merged_table = os.path.join( "in_memory", "merged_tbl_%s_%s" % (year,month) )
        arcpy.Merge_management(merge_list,merged_table)
        arcpy.MakeXYEventLayer_management(merged_table, "LON", "LAT", "merged_lyr", spatial_reference=spatialRef )
        #arcpy.management.Delete(merged_table)

    print("--- %s seconds ---" % (time.time() - t_s))

    # saving if desired    
##    print "Saving"
##    arcpy.FeatureClassToGeodatabase_conversion ("merged_lyr", r"G:\data\temp2.gdb")
##    #arcpy.TableToGeodatabase_conversion (merged_vessel_table, r"E:\data\temp.gdb")
##    #arcpy.Copy_management (merged_lyr, r"E:\data\temp.gdb\joined_0930_test")
##    print("--- %s seconds ---" % (time.time() - t_s))

    # creating tracks using Track Builder
    outFolder = os.path.join(out_folder % (year) , out_gdb % (month)) #os.path.join(data_dir,out_gdb) #r"E:\data\temp.gdb"
    outFile = out_str % ( year, month ) 
    run_track_builder(merged_lyr,merged_vessel_table,outFolder,outFile)
       
    arcpy.management.Delete(merged_lyr)
    if year<2015:
        arcpy.management.Delete(merged_vessel_table)
    
    print("--- %s seconds ---" % (time.time() - t_s))


    
''' 
# Unused

def merge_year(year,zones,months) :

    # starting timer
    t_s = time.time()

    print "selecting points"
    merge_list = [] ;
    merge_list_vessel = [] ;
    for month in months :

        # range of points to load
        selector = 'SOG>2.5 AND MMSI>=201000000 AND MMSI<=775999999 AND EXTRACT(YEAR FROM BaseDateTime)=%s AND EXTRACT(MONTH FROM BaseDateTime)=%s' % (year,month) # getting all tracks in month
        # according to moore et al MMSI only valid for 201000000-775999999
        
        for zone in zones :

            # location of points and other tables
            points_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Broadcast")
            vessel_source_z = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Vessel")
            #voyage_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Voyage") 

            ### where to store points
            #points_temp = os.path.join("in_memory","points_%s" % zone) # out_gdb
            #lines_out = os.path.join(data_dir,out_gdb,out_str % (zone, year, month, day) ) 
            points_feat = "points_%s_%s" % (zone,month)

            merge_list.append(points_feat)
            merge_list_vessel.append(vessel_source_z)

            # getting points for each zone and copying to memory
            arcpy.management.MakeFeatureLayer(points_source,points_feat,selector) 

    print("--- %s seconds ---" % (time.time() - t_s))

    print "merging"
    merged_folder = r"E:\data\temp2.gdb"
    merged_lyr = os.path.join( merged_folder , "merged_%s" % (year) )
    merged_vessel_table = os.path.join( merged_folder , "merged_vessel_%s" % (year) )

    # merge layers
    arcpy.Merge_management(merge_list,merged_lyr)

    # merge tables and remove duplicates
    arcpy.Merge_management(merge_list_vessel,merged_vessel_table)
    arcpy.DeleteIdentical_management(merged_vessel_table, ["MMSI"])

    print("--- %s seconds ---" % (time.time() - t_s))



def make_lines_month(month,zone,year) :
    days = range(1,calendar.monthrange(year,month)[1]+1)
    for day in days :
        # only creating lines if day doesn't exist
        if not arcpy.Exists( os.path.join(out_folder % (year) , out_gdb % (month) ,out_str % (zone, year, month, day) ) ) :
            make_lines(day,zone,year,month)    
    
def make_lines(day,zone,year,month) :

    # starting timer
    t_s = time.time()

    # location of points and other tables
    points_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Broadcast")
    vessel_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Vessel")
    voyage_source = os.path.join(source_gdb_str % (year,month,calendar.month_name[month],year,zone,year,month),"Voyage") 

    ### where to store points
    points_temp = os.path.join("in_memory","points") # out_gdb
    #lines_out = os.path.join(data_dir,out_gdb,out_str % (zone, year, month, day) ) 

    # range of points to load
    #date_range = 'EXTRACT(YEAR FROM BaseDateTime)=%s AND EXTRACT(MONTH FROM BaseDateTime)=%s AND EXTRACT(DAY FROM BaseDateTime)=%s' % (year,month,day)
    date_range = 'SOG>0 AND EXTRACT(YEAR FROM BaseDateTime)=%s AND EXTRACT(MONTH FROM BaseDateTime)=%s' % (year,month) # getting all tracks in month

    print "selecting points"
    # getting points for single day and copying to memory
    #print "Accessing Layers"
    arcpy.management.MakeFeatureLayer(points_source,"points_feat",date_range) 

    #arcpy.management.CopyFeatures("points_feat",points_temp)

    print("--- %s seconds ---" % (time.time() - t_s))

    # creating tracks using Track Builder
    sInputFile = "points_feat" # can use feature class here. Saves having to load the layer # points_temp
    #sInputFile = points_temp 
    sOutWorkspace = os.path.join(out_folder % (year) , out_gdb % (month)) #os.path.join(data_dir,out_gdb) #r"E:\data\temp.gdb"
    sOutName = out_str % (zone, year, month, day) 
    sLineMethod = "Maximum Time and Distance" # "Time Interval" 
    #iMaxTimeMinutes = 120 # first version this set to 60
    #iMaxDistMiles = 20   # first version this set to 20
    iIntervalHours = 1  # not used
    sAddVesselData = "true"
    vesselTable = vessel_source
    voyageMethod = "Most Frequent" # "First"
    sAddVoyageData = "true"
    voyageTable = voyage_source

    print "Making Tracks"
    TB.RunTrackBuilder(sInputFile=sInputFile, sOutWorkspace=sOutWorkspace, sOutName=sOutName,sLineMethod=sLineMethod,
                iIntervalHours=iIntervalHours,
                sAddVesselData=sAddVesselData, vesselTable=vesselTable,
               voyageMethod = voyageMethod , sAddVoyageData = sAddVoyageData , voyageTable = voyageTable ) 

    arcpy.management.Delete(points_temp)
    
    print("--- %s seconds ---" % (time.time() - t_s))
'''


def unzip(zones,years,months):
    ''' Function to unzip required data from all folders
        Will only unzip if required gdb is not already there
    '''   
    for year in years :
        for m in months :
            unzip_month(m,zones,year)
       
def unzip_month(m,zones,year) :
        
        folder = os.path.join(data_dir, source_gdb_folder %( year, m , calendar.month_name[m] , year ) )

        print folder
        
        if not os.path.isdir(folder) :
            print "Renaming"
            folder_to_rename = os.path.join(data_dir,"%s" % year,"%02d" %( m ) )
            os.rename(folder_to_rename,folder)
        
        os.chdir(folder) # change directory from working dir to dir with files

        for zone in zones:
            name = tag %(zone,year,m)
            item = name + ".zip"
            if not os.path.isfile(item):
                item = name + ".gdb.zip"
            #for item in os.listdir(folder) : #
                           
            file_name = os.path.abspath(item)
            if not os.path.isdir( os.path.join(folder,name+".gdb") ) :
                #print os.path.join(folder,name+".gdb")
                print "Extracting: %s" %(item)    
                z = zipfile.ZipFile(file_name)                                    
                z.extractall(folder)
                z.close()
            else :
                print "Exists: %s" %(item)


if __name__ == '__main__':
    main()














